home *** CD-ROM | disk | FTP | other *** search
Wrap
(*************************************************** Ant Movie Catalog importation script www.antp.be/software/moviecatalog/ [Infos] Authors=Antoine Potten Title=IMDB Description=Import data & picture from IMDB (optional image from Amazon) Site=us.imdb.com Language=EN Version=1.0 Requires=3.5.0 Comments=Based on the script made for version 3.x by Antoine Potten, Danny Falkov, Kai Blankenhorn, lboregard, Ork, Trekkie, Youri Heijnen License=This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. GetInfo=1 [Options] ImageKind=0|1|0=No image|1=IMDB small image, from the main movie page|2=IMDB large image if found easily, else small image|3=First search for Amazon large image, then IMDB large one, then IMDB small image if other failed|4=First search for Amazon large image, then directly take IMDB small image if the first one failed|5=IMDB large image if found easily, else search for Amazon large image, then take IMDB small image if others failed BatchMode=1|0|0=Normal working mode, prompts user when needed|1=Does not display any window, takes the first movie found|2=Same as 1, but it uses the URL field if available to update movie information PopularSearches=1|1|0=Do not use the popular searches page, directly show full search results|1=Show popular searches first, I'll click on "Find more" if needed ActorsLayout=0|0|0=Only actor names, separated by commas|1=Only actor names, separated by linebreaks|2=Actors names with character names between parenthesis separated by commas|3=Actors names with character names between parenthesis separated by linebreaks|4=Actor names like on IMDB page, with "...." and separated by linebreaks MultipleValuesCountry=0|0|0=Only take first value for Country|1=Take full list, separated by commas|2=Take full list, separated by slashes MultipleValuesCategory=0|0|0=Only take first value for Category|1=Take full list, separated by commas|2=Take full list, separated by slashes MultipleValuesLanguages=0|0|0=Only take first value for Languages|1=Take full list, separated by commas|2=Take full list, separated by slashes DescriptionSelection=1|1|0=Take the short summary, from main page (faster)|1=Show a list of available summaries|2=Take the longest summary GetTagline=0|0|0=Do not get tagline|1=Put it in Description field, before the summary|2=Put it in the Comment field, before the comments ***************************************************) program IMDB; uses StringUtils1; var MovieName: string; MovieURL: string; MovieNumber: string; // ***** analyzes the results page that asks to select a movie from a list ***** procedure AnalyzeResultsPage(Address: string); var PageText: string; Value: string; begin PageText := GetPage(Address); if pos('<title>IMDb', PageText) = 0 then begin AnalyzeMoviePage(PageText) end else begin if Pos('<b>No Matches.</b>', PageText) > 0 then begin if GetOption('BatchMode') = 0 then ShowMessage('No movie found for this search'); Exit; end; if GetOption('BatchMode') = 0 then begin PickTreeClear; repeat Value := TextBefore(PageText, '<ol>', '<b>'); if Value <> '' then begin HTMLRemoveTags(Value); HTMLDecode(Value); PickTreeAdd(Value, ''); end; Value := TextBetween(PageText, '<ol>', '</ol>'); PageText := RemainingText; until not AddMovieTitles(Value); Value := TextBefore(PageText, '"><b>more titles</b></a>', '<a href="'); if Value <> '' then PickTreeMoreLink('http://us.imdb.com' + Value); if PickTreeExec(Address) then AnalyzeResultsPage(Address); end else begin Value := TextBetween(TextBetween(PageText, '<ol>', '</ol>'), '<li>', '</li>'); if Value <> '' then AnalyzeResultsPage(TextBetween(Value, '<a href="', '">')); end; end; end; // ***** adds the titles contained in <ol>'s items ***** function AddMovieTitles(List: string): Boolean; var Value: string; Address: string; begin Result := False; Value := TextBetween(List, '<li>', '</li>'); List := RemainingText; while Value <> '' do begin Address := TextBetween(Value, '<a href="', '">'); HTMLRemoveTags(Value); HTMLDecode(Value); PickTreeAdd(Value, 'http://us.imdb.com' + Address); Result := True; Value := TextBetween(List, '<li>', '</li>'); List := RemainingText; end; end; // ***** analyzes the page containing movie information ***** procedure AnalyzeMoviePage(PageText: string); var Value, Value2, Value3, FullValue: string; begin MovieNumber := TextBetween(PageText, '<input type="hidden" name="arg" value="', '"><input'); MovieURL := 'http://us.imdb.com/title/tt' + MovieNumber; // URL if CanSetField(fieldURL) then SetField(fieldURL, MovieURL); // Original Title & Year if CanSetField(fieldOriginalTitle) or CanSetField(fieldYear) then begin Value := TextBetween(PageText, '<title>', '</title>'); Value2 := TextBefore(Value, ' (', ''); Value := RemainingText; HTMLDecode(Value2); if CanSetField(fieldOriginalTitle) then SetField(fieldOriginalTitle, Value2); if Pos('/', Value) > 0 then Value2 := TextBefore(Value, '/', '') else Value2 := TextBefore(Value, ')', ''); if CanSetField(fieldYear) then SetField(fieldYear, Value2); end; // Rating if CanSetField(fieldRating) then begin Value := TextBetween(PageText, '/rating-stars/', '/rating-vote/'); SetField(fieldRating, TextBetween(Value, '<b>', '/')); end; // Picture if CanSetPicture then begin case GetOption('ImageKind') of 1: ImportSmallPicture(PageText); 2: if not ImportLargePicture('http://us.imdb.com/gallery/ss/' + MovieNumber) then ImportSmallPicture(PageText); 3: if not ImportAmazonPicture(PageText) then if not ImportLargePicture('http://us.imdb.com/gallery/ss/' + MovieNumber) then ImportSmallPicture(PageText); 4: if not ImportAmazonPicture(PageText) then ImportSmallPicture(PageText); 5: if not ImportLargePicture('http://us.imdb.com/gallery/ss/' + MovieNumber) then if not ImportAmazonPicture(PageText) then ImportSmallPicture(PageText); end; end; // Director if CanSetField(fieldDirector) then begin Value := TextBetween(PageText, '<b class="blackcatheader">Directed by</b><br>', '<br>' + #13); Value := StringReplace(TextAfter(Value, '">'), '<br>', ', '); HTMLRemoveTags(Value); HTMLDecode(Value); SetField(fieldDirector, Value); end; // Actors if CanSetField(fieldActors) then begin Value := TextBetween(PageText, 'ast overview', '</div>'); if Value = '' then Value := TextBetween(PageText, 'redited cast', '</div>'); if Value <> '' then begin Value := TextAfter(Value, '</tr> '); FullValue := ''; case GetOption('ActorsLayout') of 0, 1: while Pos('<tr>', Value) > 0 do begin Value2 := TextBetween(Value, '<tr>', '</tr>'); Value := RemainingText; if Pos('<a href="fullcredits">(more)</a>', Value2) > 0 then Break; if FullValue <> '' then FullValue := FullValue + #13#10; FullValue := FullValue + TextBefore(Value2, '</td>', ''); end; 2, 3: while Pos('<tr>', Value) > 0 do begin Value2 := TextBetween(Value, '<tr>', '</tr>'); Value := RemainingText; if Pos('<a href="fullcredits">(more)</a>', Value2) > 0 then Break; if FullValue <> '' then FullValue := FullValue + #13#10; FullValue := FullValue + TextBefore(Value2, '</td>', ''); Value2 := TextBetween(RemainingText, '<td valign="top">', '</td>'); if Value2 <> '' then FullValue := FullValue + ' (as ' + Value2 + ')'; end; 4: begin FullValue := TextBefore(Value, '</tr><tr><td colspan="2">', ''); if FullValue = '' then FullValue := Value; FullValue := StringReplace(FullValue, '</tr>', #13#10); end; end; HTMLRemoveTags(FullValue); HTMLDecode(FullValue); case GetOption('ActorsLayout') of 0, 2: FullValue := StringReplace(FullValue, #13#10, ', '); end; SetField(fieldActors, FullValue); end; end; //Country if CanSetField(fieldCountry) then begin SetField(fieldCountry, ImportList(PageText, GetOption('MultipleValuesCountry'), '/Countries/')); end; //Category if CanSetField(fieldCategory) then begin SetField(fieldCategory, ImportList(PageText, GetOption('MultipleValuesCategory'), '/Genres/')); end; // Language if CanSetField(fieldLanguages) then begin SetField(fieldLanguages, ImportList(PageText, GetOption('MultipleValuesLanguages'), '/Languages/')); end; //Description if CanSetField(fieldDescription) then begin Value := TextBetween(PageText, '<b class="ch">Plot Outline:</b>', '<br><br>'); if Value = '' then Value := TextBetween(PageText, '<b class="ch">Plot Summary:</b>', '<br><br>'); if Value <> '' then SetField(fieldDescription, ImportSummary(Value)); end; // Comments if CanSetField(fieldComments) then begin Value := TextAfter(PageText, '/comments">'); if Value <> '' then begin Value := TextBetween(Value, '<p>', '</p>'); Value := StringReplace(Value, #13#10, ' '); Value := StringReplace(Value, '<br>', #13#10); HTMLRemoveTags(Value); HTMLDecode(Value); Value := Trim(Value); while Pos(' ', Value) > 0 do Value := StringReplace(Value, ' ', ' '); while Pos(#13#10, Value) = 1 do Delete(Value, 1, 2); SetField(fieldComments, Value); end; end; // Length if CanSetField(fieldLength) then begin Value := TextBetween(PageText, '<b class="ch">Runtime:</b>' + #13#10, ' '); if Value <> '' then begin if Pos(':', Value) > 0 then SetField(fieldLength, TextAfter(Value, ':')) else SetField(fieldLength, Value); end; end; // TagLine if GetOption('GetTagline') > 0 then begin Value := TextBetween(PageText, 'Tagline:</b>', #13); if Pos('<a', Value) > 0 then Value := TextBefore(Value, '<a', ''); HTMLRemoveTags(Value); HTMLDecode(Value); Value := Trim(Value); if Value <> '' then begin Value := '"' + Value + '"'; case GetOption('GetTagline') of 1: if CanSetField(fieldDescription) then SetField(fieldDescription, Value + #13#10 + GetField(fieldDescription)); 2: if CanSetField(fieldComments) then SetField(fieldComments, Value + #13#10 + GetField(fieldComments)); end; end; end; end; // ***** Imports lists like Genre, Country, etc. depending of the selected option ***** function ImportList(PageText: string; MultipleValues: Integer; StartTag: string): string; var Value, Value2: string; begin if MultipleValues = 0 then begin Value := TextBetween(PageText, StartTag, '</a>'); Value2 := TextAfter(Value, '">'); end else begin Value := TextBetween(PageText, StartTag, #13#10); Value2 := TextBefore(Value, ' <a href="/rg', ''); if Value2 <> '' then Value := Value2; Value2 := TextAfter(Value, '">'); HTMLRemoveTags(Value2); if MultipleValues = 1 then Value2 := StringReplace(Value2, ' / ', ', '); end; HTMLDecode(Value2); Result := Value2; end; // ***** functions to import the different pictures kinds, depending of the option selected by user ***** function ImportSmallPicture(PageText: string): Boolean; var Value: string; begin Result := False; Value := TextBetween(PageText, '<img border="0" alt="cover" src="', '"'); if Value <> '' then begin GetPicture(Value); Result := True; end; end; function ImportLargePicture(Address: string): Boolean; var Value, Value2: string; begin Result := True; Value := GetPage(Address); if SearchForLargePicture(Value, 'Onesheet_text', False) then Exit; if SearchForLargePicture(Value, 'keyart01', True) then Exit; if SearchForLargePicture(Value, 'keyart02', True) then Exit; if SearchForLargePicture(Value, 'oster', True) then // poster, usposter, Poster Exit; if SearchForLargePicture(Value, 'pos01', True) then Exit; if SearchForLargePicture(Value, 'KeyArt', True) then Exit; if SearchForLargePicture(Value, 'heet', True) then // Sheet & Onesheet Exit; if SearchForLargePicture(Value, 'OneSheetv2', True) then Exit; if SearchForLargePicture(Value, 'artwork', True) then Exit; if SearchForLargePicture(Value, 'text', True) then Exit; Address := TextBetween(Value, 'There are ' + #13#10 + '<a href="', '">'); if Address <> '' then Result := ImportLargePicture('http://us.imdb.com' + Address) else Result := False; end; function SearchForLargePicture(PageText: string; Name: string; PartialName: Boolean): Boolean; var Value: string; begin Result := False; if PartialName then begin Value := TextBefore(PageText, Name + '.jpg', '/'); if Value = '' then Exit else Name := Value + Name; end; Value := TextBefore(PageText, 'th-' + Name + '.jpg', 'src="'); if Value <> '' then begin GetPicture(Value + Name + '.jpg'); Result := True; end; end; function ImportAmazonPicture(PageText: string): Boolean; var Value, Value2: string; begin Result := False; Value := TextBefore(PageText, '" title="DVD available', '<a href="'); if Value = '' then Exit; PageText := GetPage('http://us.imdb.com' + Value); if Pos('unable to find exact matches', PageText) > 0 then Exit; if Pos('You may also be interested in these items...', PageText) > 0 then PageText := TextBefore(PageText, 'You may also be interested in these items...', ''); Value := TextBefore(PageText, 'TZZZZZZZ.jpg', '<img src="'); if Value = '' then Value := TextBefore(PageText, 'THUMBZZZ.jpg', '<img src="'); if Value <> '' then begin GetPicture(Value + 'LZZZZZZZ.jpg'); Result := True; end; end; // ***** Gets summaries for the movie, based on the plot outline given in parameter (that contains the URL to more summaries) ***** function ImportSummary(PlotText: string): string; var Address, Value, Value2, PageText, Longest: string; begin Address := TextBetween(PlotText, '<a href="/rg/title-tease/plotsummary', '">(more)</a>'); if (Address = '') or (GetOption('DescriptionSelection') = 0) then begin Result := Trim(TextBefore(PlotText, '<a href="/rg', '')); if Result = '' then Result := Trim(PlotText); HTMLRemoveTags(Result); HTMLDecode(Result); end else begin PageText := GetPage('http://us.imdb.com/rg/title-tease/plotsummary' + Address); PickListClear; Longest := ''; Value := TextBetween(PageText, '<p class="plotpar">', '</p>'); PageText := RemainingText; while Value <> '' do begin Value := StringReplace(Value, #13#10, ' '); Value := StringReplace(Value, '<br>', #13#10); HTMLRemoveTags(Value); HTMLDecode(Value); while Pos(' ', Value) > 0 do Value := StringReplace(Value, ' ', ' '); if Length(Value) > Length(Longest) then Longest := Value; PickListAdd(Trim(Value)); Value := TextBetween(PageText, '<p class="plotpar">', '</p>'); PageText := RemainingText; end; if (GetOption('BatchMode') > 0) or (GetOption('DescriptionSelection') = 2) then Result := Longest else begin if not PickListExec('Select a description for "' + GetField(fieldOriginalTitle) + '"', Result) then Result := ''; end; end; end; // ***** beginning of the program ***** begin if CheckVersion(3,5,0) then begin MovieName := ''; if GetOption('BatchMode') = 2 then begin MovieName := GetField(fieldURL); if Pos('imdb.com', MovieName) = 0 then MovieName := ''; end; if MovieName = '' then MovieName := GetField(fieldOriginalTitle); if MovieName = '' then MovieName := GetField(fieldTranslatedTitle); if GetOption('BatchMode') = 0 then begin if not Input('IMDB Import', 'Enter the title or the IMDB URL of the movie:', MovieName) then Exit; end else Sleep(500); if MovieName <> '' then begin if Pos('imdb.com', MovieName) > 0 then AnalyzeResultsPage(MovieName) else begin MovieName := StringReplace(MovieName, '&', 'and'); if (GetOption('BatchMode') > 0) or (GetOption('PopularSearches') = 1) then AnalyzeResultsPage('http://us.imdb.com/find?tt=1;q=' + UrlEncode(MovieName)) else AnalyzeResultsPage('http://us.imdb.com/find?more=tt;q=' + UrlEncode(MovieName)); end; end; end else ShowMessage('This script requires a newer version of Ant Movie Catalog (at least the version 3.5.0)'); end.